* Review of Economics and Statistics MS 23551

/*

This Stata(14) program creates all of the tables and figures reported in the Online Appendix of:

  - Jin, Miao, Yu-Jane Liu, Juanjuan Meng, and Yu Zhang. "Transmission of Income Variations to Consumption Variations: 
    The Role of the Firm"

Data: We use a dataset on income, consumption, and financials from a leading commercial bank in Asia (henceforth “Bank”). 
    Due to a confidentiality agreement with the Bank, we cannot publicly distribute the data. After getting permission 
    from the Bank, we will provide contact information to anyone who would like to contact it for access to use the data 
    for replication.

Data files for the analysis: (Construction of these data files detailed in ConstructData.do)
	reg_yr_main.dta               : The main credit card sample, aggregated to the annual frequency (baseline sample for the analysis.)
    reg_yr_all.dta 		          : The all direct depositor sample, aggregated to the annual frequency
    reg_crsection_main.dta        : The main credit card sample, aggregated to the cross sectional level
    reg_semiannual_main.dta       : The main credit card sample, aggregated to the semi-annual frequency
    reg_mon_main.dta              : The main credit card sample, aggregated to the monthly frequency
    nmcluster.dta                 : The income cluster categories for workers in each firm created in Main.do
	jointtest.dta                 : The joint test of channel intermediate dataset created in Main.do


Key regressors: 
	logcon(_r)					  : (Residual measures of) Natural logarithm of total period spending at individual level
	logincome(_r)				  : (Residual measures of) Natural logarithm of total period earnings at individual level
	log_income_between(_r)		  : (Residual measures of) Natural logarithm of average period earnings in a firm.
	log_income_within(_r)		  : (Residual measures of) Natural logarithm of total period earnings at individual level relative to natural logarithm of average period earnings in the firm.


Other key variables:
	con							  : total period spending at individual level
	income						  : total period earnings at individual level
	saving_bal					  : period end balance of deposits
	debt_bal					  : period end balance of debt, which includes credit card debt and installment loans
	age 						  : age of the individual
	gender						  : = 1 for female, and 0 otherwise
	marriage 					  : = 1 if the individual is married, and 0 otherwise
	education					  : education categories, including graduate degree, bachelor’s degree, associate’s degree, high school, middle school and below.
	ocpclass					  : job position categories, including blue collar, white collar, and managerial workers.
	firmsize					  : number of employees in a firm
	wealth                      : average account wealth over sample period
	wealthtoincome              : the wealth to income ratio (average wealth over average income for the sample period)
	log_income_between_cluster(_r): (residual measures of)the difference between log average earnings of the income cluster and log firm average earnings
	log_income_within_cluster(_r) : (residual measures of)the difference between log earnings and log average earnings of the income cluster
	diff 						  : the estimated average differences in consumption pass-throughs between within-firm and between-firm income changes 
	persisdiff_sd 				  : the extent of the persistence difference between the between-firm component of income and the within-firm component at the region × industry level, standardized to have a mean of zero and a unit variance
	varianceratio_sd 			  : the degree to which wealthier employees are more exposed to variance of innovations in the within-firm component of income than poorer employees in the region × industry subsample, standardized to have a mean of zero and a unit variance
	peeravg_sd					  : the average degree of peer effect for all employees in the region × industry subsample, standardized to have a mean of zero and a unit variance
    industry                      : industry categories, including agriculture, manufacturing, utilities and public service, commerce, finance, science and education, and healthcare
    region                        : region identifiers
	maxcls                        : the optimal number of income clusters in a firm
	firmAvgIncome                 : average employee period earnings in a firm.
	cash1                         : the ATM cash withdrawals between 100 and 20,000 in local currency 
	cash2                         : the ATM cash withdrawals between 100 and 5,000 in local currency 
*/

global outputdir "`where you save the results'"
global workdir "`where the test data are placed'"


***************************************
******   FigureA1
***************************************
**  Distribution of the Optimal Number of Clusters

use "$workdir/nmcluster",clear     
	bysort company_no: keep if _n==1
	histogram maxcls, freq addl ytitle("Number of firms") xtitle("Optimal number of clusters") xlabel(2 "2" 3 "3" 4 "4" 5 "5 or more",notick) barw(0.2) color(gs8) graphr(color(white))
		graph export "$outputdir/figurea1.png", as(png) 

***************************************
******	Figure A2
***************************************
** The distribution of Log Changes in Employee Income against Different Percentiles of Changes in the Log Firm Average Earnings
use "$workdir/reg_yr_main",clear

	sort id year

	by id: gen growth_income_between=log_income_between[2]-log_income_between[1]
	by id: gen growth_income=logincome[2]-logincome[1]
	xtile ibin=growth_income_between , n(100)   

	collapse (p10) p10_i=growth_income   (p25) p25_i=growth_income  (p50) p50_i=growth_income    ///
			 (p75) p75_i=growth_income   (p90) p90_i=growth_income  (mean) m_b=growth_income_between, by(ibin)
	twoway (lpoly p10_i ibin,lp(longdash_dot) bwidth(1)) (lpoly p25_i ibin,lp(longdash) bwidth(1))(lpoly p50_i ibin,lp(solid) bwidth(1)) (lpoly p75_i ibin,lp(shortdash) bwidth(1)) (lpoly p90_i ibin,lp(dash_dot) bwidth(1)),ytitle("Log Changes in Worker Earnings") xtitle("Percentiles of Log Changes in Firm Average Earnings") xlabel(1 `" "1th" "(-0.49)" "' 25 `" "25th" "(0.01)" "' 50 `" "50th" "(0.09)" "' 75 `" "75th" "(0.15)" "' 99 `" "99th" "(0.58)""' ) legend(label(1 "10th percentile") label(2 "25th percentile") label(3 "median") label(4 "75th percentile") label(5 "90th percentile") rows(2)) graphr(color(white)) 
		graph export "$outputdir/figurea2.png", as(png) 


***************************************
******	Table A1     
***************************************
** Summary Statistics

use "$workdir/reg_yr_all",clear

	local varlist con income saving_bal debt_bal age gender marriage education_1-education_5 ocpclass_1-ocpclass_3
	eststo: estpost sum `varlist', detail  
		esttab using "$outputdir/tablea1", cells( "count mean sd  p25 p50 p75") csv noobs plain   

	bysort company_no: keep if _n==1
	eststo: estpost sum  firmsize , detail
		esttab using "$outputdir/tablea1", cells( "count mean sd  p25 p50 p75") csv noobs plain append  

***************************************
******	Table A2
***************************************
** The Role of Bonuses in the Persistence Difference 

use "$workdir/reg_mon_main", clear

	keep id company_no ym yyyymm income firmAvgIncome
	recode yyyymm (201307/201406 = 1) (201407/201506 = 2), generate(year)
	gen month = mod(yyyymm,100)

	bysort id year: egen medincome = median(income)
	gen  med2incomemonth_tmp = (income>2*medincome) // has received a pay that is higher than 2*median pay of the year
	bysort id year: egen maxincome = max(income)
	gen  maxincomemonth_tmp = (income==maxincome)   // has received a pay that is highest of the year 

	preserve 
		// Column 1: January and July, but only include employees who have received at least 
		// one bonus in January or July that is larger than twice the median monthly pay 
		// of the year. 

		gen bonus_month = inlist(month,1,7) // January and July
		gen touse_definition1_tmp = bonus_month & (med2incomemonth_tmp==1)
		bysort id: egen touse_definition1 = max(touse_definition1_tmp)
		keep if touse_definition1 == 1 // drop employees w/o bonus months
		save "$workdir/bonus_column1", replace
	restore

	preserve 
		// Column 2: January and July, but only include employees who have received at least 
		// one bonus in January or July that is larger than twice the median monthly pay 
		// of the year AND have received at least one bonus in January or July that is the 
		// highest for the year. 

		gen bonus_month = inlist(month,1,7) // January and July
		gen touse_definition2_tmp = bonus_month & (med2incomemonth_tmp==1) & (maxincomemonth_tmp==1)
		bysort id: egen touse_definition2 = max(touse_definition2_tmp)
		keep if touse_definition2 == 1 // drop employees w/o bonus months
		save "$workdir/bonus_column2", replace
	restore

	preserve 
		// Column 3: Any month that the employee received a pay higher than twice the median 
		// monthly pay of the year; carries over to the same month in both years. 

		bysort id month: egen bonus_month = max(med2incomemonth_tmp) // carries over to both years
		bysort id: egen touse_definition3 = max(bonus_month)
		keep if touse_definition3 == 1 // drop employees w/o bonus months
		save "$workdir/bonus_column3", replace
	restore

	preserve 
		// Column 4: Any month that the employee received a pay higher than twice the median 
		// monthly pay of the year AND is the highest of the year; carries over to the same 
		// month in both years. 

		bysort id month: egen bonus_month = max(med2incomemonth_tmp * maxincomemonth_tmp) // carries over to both years
		bysort id: egen touse_definition4 = max(bonus_month)
		keep if touse_definition4 == 1 // drop employees w/o bonus months
		save "$workdir/bonus_column4", replace
	restore

	forvalues i = 1/4 {
		use "$workdir/bonus_column`i'", clear
		gen    bonus_income    		= income 		if bonus_month==1
		gen nonbonus_income 		= income 		if bonus_month==0
		gen    bonus_firmAvgIncome  = firmAvgIncome if bonus_month==1
		gen nonbonus_firmAvgIncome 	= firmAvgIncome if bonus_month==0
		collapse (mean) income1 = bonus_income  ///
					    income0 = nonbonus_income  ///
					    firmAvgIncome1 = bonus_firmAvgIncome  ///
					    firmAvgIncome0 = nonbonus_firmAvgIncome  ///
					    , by(id year)
		reshape long income firmAvgIncome, i(id year) j(bonus)
		// the data now is a panel. Each worker has two selves, representing her in bonus months and in nonbonus months
		gen logincome = log(income)
		gen log_between_income = log(firmAvgIncome)
		gen log_within_income = logincome - log_between_income
		gen y0 = log_between_income
		gen y1 = log_within_income
		reshape long y, i(id year bonus) j(within)
		// the data now is a panel. Each worker has four newids, representing her in bonus months and in nonbonus months * within v.s. between income.
		egen newid = group(id bonus within)
		xtset newid year

		reg y L.y cL.y#c.within cL.y#c.bonus cL.y#c.within#c.bonus bonus within c.bonus#c.within, vce(cluster id)
		outreg2 using "$outputdir/tablea2.xls", dec(3) cttop(`v') append
		}

***************************************
******	Table A3
***************************************
** Different Persistence (Residual Measures of Income)

use "$workdir/reg_yr_main",clear

	xtset id year

	reg log_income_within_r L.log_income_within_r  , vce(cluster id)
		outreg2 using "$outputdir/tablea3.xls", dec(3) cttop(within) 
	reg log_income_between_r L.log_income_between_r  , vce(cluster id)     
		outreg2 using "$outputdir/tablea3.xls", dec(3) cttop(between) append

	gen L_log_income_within_r = L.log_income_within_r
	gen L_log_income_between_r = L.log_income_between_r

	reg log_income_within_r L_log_income_within_r 
		est store within
	reg log_income_between_r L_log_income_between_r
		est store between
	suest within between, vce(cluster id)
		test [within_mean]L_log_income_within_r=[between_mean]L_log_income_between_r


***************************************
******	Table A4 
***************************************
** Self-Insurance and the Variances of the Between and Within Components of Income Growth (Residual Measures of Income and Consumption)

**	PanelA

use "$workdir/reg_yr_main",clear

	xtset id year

	quietly sum wealth, detail
	gen highwealth = wealth > r(p50) & wealth <.
	reg S.logcon_r S.logincome_r cS.logincome_r#c.highwealth, vce(cluster id) 
		outreg2 using "$outputdir/tablea4a.xls", dec(3) cttop(highwealth)

	quietly sum wealthtoincome, detail
	gen highwealthtoincome = wealthtoincome > r(p50) & wealthtoincome <.
	reg S.logcon_r S.logincome_r cS.logincome_r#c.highwealthtoincome, vce(cluster id)  
		outreg2 using "$outputdir/tablea4a.xls", dec(3) cttop(highwealthtoincome) append

**	PanelB
	capture matrix drop WSS
	matrix WSS = J(3,5,.)

	sum S.log_income_between_r
	matrix WSS[1, 1] = r(Var)
	sum S.log_income_between_r if highwealth==0
	matrix WSS[1, 2] = r(Var)
	sum S.log_income_between_r if highwealth==1
	matrix WSS[1, 3] = r(Var)

	sum S.log_income_within_r
	matrix WSS[3, 1] = r(Var)
	sum S.log_income_within_r if highwealth==0
	matrix WSS[3, 2] = r(Var)
	sum S.log_income_within_r if highwealth==1
	matrix WSS[3, 3] = r(Var)

	sum S.log_income_between_r if highwealthtoincome==0
	matrix WSS[1, 4] = r(Var)
	sum S.log_income_between_r if highwealthtoincome==1
	matrix WSS[1, 5] = r(Var)

	sum S.log_income_within_r if highwealthtoincome==0
	matrix WSS[3, 4] = r(Var)
	sum S.log_income_within_r if highwealthtoincome==1
	matrix WSS[3, 5] = r(Var)

	matrix list WSS
	putexcel A1=matrix(WSS) using "$outputdir/tablea4b" 



***************************************
******	Table A5    
***************************************
** Difference in Consumption Transmission: Within-Firm Clusters (Residual Measures of Income and Consumption)

use "$workdir/reg_yr_main",clear

	xtset id year

	reg S.logcon_r S.logincome_r S.log_income_between_cluster_r S.log_income_within_cluster_r, vce(cluster id)   
	test S.log_income_between_cluster_r=S.log_income_within_cluster_r
	local rp1 = r(p)
		outreg2 using "$outputdir/tablea5.xls", dec(3) adds(Prob>F b2_b3, `rp1')  

***************************************
******	Table A6
***************************************
** Joint Tests of Channels: Income Persistence, Self-insurance, and Peer Effect (Residual Measures of Income and Consumption)

use "$workdir/reg_yr_main", clear

	xtset id year

	bysort industry region: gen count_industryregion = 1 if _n==1
	gen industryregion = sum(count_industryregion) // a way of generating consistent industryregion identifiers

	** alternative Y -- diff_r: Strength of the consumption transmission difference (between-firm v.s. within-firm), residual based measure

	levelsof industryregion, local(industryregion_list)
	
    xtset id year
	
	gen diff_r = . // To be genenrated
	foreach i of local industryregion_list {
		cap reg S.logcon_r S.logincome_r S.log_income_within_r if industryregion==`i', vce(cluster id)
		if _rc==0 {
			replace diff_r=_b[S.log_income_within_r] if industryregion==`i'  
			}
		}
	bysort  industryregion: keep if _n==1
	merge 1:1 industryregion using "$workdir/jointtest"
	drop _merge 

	reg diff_r persisdiff_sd                             [aw = Nworkers],vce(robust)   
		outreg2 using "$outputdir/tablea6.xls", dec(3) cttop(1)   
	reg diff_r               varianceratio_sd            [aw = Nworkers],vce(robust)  
		outreg2 using "$outputdir/tablea6.xls", dec(3) cttop(2) append  
	reg diff_r                                peeravg_sd [aw = Nworkers],vce(robust)  
		outreg2 using "$outputdir/tablea6.xls", dec(3) cttop(3) append  
	reg diff_r persisdiff_sd varianceratio_sd peeravg_sd [aw = Nworkers],vce(robust) 
		outreg2 using "$outputdir/tablea6.xls", dec(3) cttop(4) append  
 
 
***************************************
******	Table A7, column 1
***************************************
** Difference in Consumption Transmission: Robustness to Different Sampling Frequencies and Firm Hierarchical Structures

**	column 1

use "$workdir/reg_crsection_main",clear

	reg logcon logincome   , vce(cluster id)  
		outreg2 using "$outputdir/tablea7panela.xls", dec(3) cttop(crosssectional) 
	reg logcon logincome log_income_within  , vce(cluster id) 
		outreg2 using "$outputdir/tablea7panelb.xls", dec(3) cttop(crosssectional) 

**	column 2

use "$workdir/reg_semiannual_main",clear

	xtset id halfyear

	reg S.logcon S.logincome ,vce(cluster id)  
		outreg2 using "$outputdir/tablea7panela.xls", dec(3) cttop(semiannual) append
	reg S.logcon S.logincome S.log_income_within ,vce(cluster id)   
		outreg2 using "$outputdir/tablea7panelb.xls", dec(3) cttop(semiannual) append

**	column 3

use "$workdir/reg_mon_main",clear
	
	xtset id ym
	
	reg S.logcon S.logincome  ,vce(cluster id)   
		outreg2 using "$outputdir/tablea7panela.xls", dec(3) cttop(month) append
	reg S.logcon S.logincome S.log_income_within ,vce(cluster id)   
		outreg2 using "$outputdir/tablea7panelb.xls", dec(3) cttop(month) append

**	column 4

use "$workdir/reg_yr_main",clear

	xtset id year

	reghdfe logincome ocpclass_1 ocpclass_2 ocpclass_3 if year==1, absorb(log_income_between_alt_1=company_no) noconstant
	reghdfe logincome ocpclass_1 ocpclass_2 ocpclass_3 if year==2, absorb(log_income_between_alt_2=company_no) noconstant
	gen     log_income_between_alt = log_income_between_alt_1 if year==1
	replace log_income_between_alt = log_income_between_alt_2 if year==2
	gen log_income_within_alt = logincome-log_income_between_alt

	reg S.logcon S.logincome  ,vce(cluster id)   
		outreg2 using "$outputdir/tablea7panela.xls", dec(3) cttop(c4) append
	reg S.logcon S.logincome S.log_income_within_alt ,vce(cluster id)   
		outreg2 using "$outputdir/tablea7panelb.xls", dec(3) cttop(c4) append

***************************************
******	Table A8
***************************************
** Effects of within-firm versus between-firm income innovations on consumption payment methods
use "$workdir/reg_yr_main",clear

	xtset id year

	gen cash_to_creditcard_1 = cash1/con
	gen cash_to_creditcard_2 = cash2/con
	reg S.cash_to_creditcard_1 S.logincome S.log_income_within ,vce(cluster id)    
		outreg2 using "$outputdir/tablea8.xls", dec(3) cttop(cash1) 
	reg S.cash_to_creditcard_2 S.logincome S.log_income_within ,vce(cluster id)   
		outreg2 using "$outputdir/tablea8.xls", dec(3) cttop(cash2) append
	
***************************************
******	Table A9
***************************************
** Difference in Consumption Transmission: Alternative Measurement of Consumption

	gen logcon_plus_cash1=log(cash1+con)
	gen logcon_plus_cash2=log(cash2+con)
	reg S.logcon_plus_cash1 S.logincome S.log_income_within ,vce(cluster id)    
		outreg2 using "$outputdir/tablea9.xls", dec(3) cttop(cash1) 
	reg S.logcon_plus_cash2 S.logincome S.log_income_within ,vce(cluster id)    
		outreg2 using "$outputdir/tablea9.xls", dec(3) cttop(cash2) append

***************************************
******	Table A10
***************************************
** Difference in Consumption Transmission: Excluding the Tails of the Sample

use "$workdir/reg_yr_main",clear

	sort id year

	by id: gen growth_income_between=log_income_between[2]-log_income_between[1]
	xtile ibin=growth_income_between , n(100)   
	xtset id year

	reg S.logcon S.logincome S.log_income_within if ibin>1 & ibin<=99,vce(cluster id)    
		outreg2 using "$outputdir/tablea10.xls", dec(3) cttop(robust1rightleft) 
	reg S.logcon S.logincome S.log_income_within if ibin>2 & ibin<=98,vce(cluster id)
		outreg2 using "$outputdir/tablea10.xls", dec(3) cttop(robust2rightleft) append
	reg S.logcon S.logincome S.log_income_within if ibin>5 & ibin<=95,vce(cluster id)
		outreg2 using "$outputdir/tablea10.xls", dec(3) cttop(robust5rightleft) append
	reg S.logcon S.logincome S.log_income_within if ibin>1 ,vce(cluster id) 
		outreg2 using "$outputdir/tablea10.xls", dec(3) cttop(robust1left) append
	reg S.logcon S.logincome S.log_income_within if ibin>2 ,vce(cluster id) 
		outreg2 using "$outputdir/tablea10.xls", dec(3) cttop(robust2left) append
	reg S.logcon S.logincome S.log_income_within if ibin>5 ,vce(cluster id) 
		outreg2 using "$outputdir/tablea10.xls", dec(3) cttop(robust5left) append
	reg S.logcon S.logincome S.log_income_within if ibin<=99,vce(cluster id) 
		outreg2 using "$outputdir/tablea10.xls", dec(3) cttop(robust1right) append
	reg S.logcon S.logincome S.log_income_within if ibin<=98,vce(cluster id) 
		outreg2 using "$outputdir/tablea10.xls", dec(3) cttop(robust2right) append
	reg S.logcon S.logincome S.log_income_within if ibin<=95,vce(cluster id) 
		outreg2 using "$outputdir/tablea10.xls", dec(3) cttop(robust5right) append

***************************************
******	Table A11
***************************************
** Variances of the Between and Within Components of Income Growth: Grouped by Additional Demographic Characteristics: Age, Gender, Occupation, Education, and Income

use "$workdir/reg_yr_main",clear

	xtset id year

	capture matrix drop WSS
	matrix WSS = J(3,10,.)

*age
	quietly sum age, detail
	gen highage = age > = r(p50) & age <.

	sum S.log_income_between if highage==0
	matrix WSS[1, 1] = r(Var)
	sum S.log_income_between if highage==1
	matrix WSS[1, 2] = r(Var)
	sum S.log_income_within if highage==0
	matrix WSS[3, 1] = r(Var)
	sum S.log_income_within if highage==1
	matrix WSS[3, 2] = r(Var)
	drop highage

*gender   male=0 , female=1
	sum S.log_income_between if gender==0
	matrix WSS[1, 3] = r(Var)
	sum S.log_income_between if gender==1
	matrix WSS[1, 4] = r(Var)
	sum S.log_income_within if gender==0
	matrix WSS[3, 3] = r(Var)
	sum S.log_income_within if gender==1
	matrix WSS[3, 4] = r(Var)

*edu
	quietly sum education, detail
	gen lowedu = education >  r(p50) & education <.

	sum S.log_income_between if lowedu==1
	matrix WSS[1, 5] = r(Var)
	sum S.log_income_between if lowedu==0
	matrix WSS[1, 6] = r(Var)
	sum S.log_income_within if lowedu==1
	matrix WSS[3, 5] = r(Var)
	sum S.log_income_within if lowedu==0
	matrix WSS[3, 6] = r(Var)
	drop lowedu

*occupation
	quietly sum ocpclass, detail
	gen highocp = ocpclass >  r(p50) & ocpclass <.

	sum S.log_income_between if highocp==0
	matrix WSS[1, 7] = r(Var)
	sum S.log_income_between if highocp==1
	matrix WSS[1, 8] = r(Var)
	sum S.log_income_within if highocp==0
	matrix WSS[3, 7] = r(Var)
	sum S.log_income_within if highocp==1
	matrix WSS[3, 8] = r(Var)
	drop highocp

*income
	bysort id: egen inctmp = mean(income)        // average income over sample period
	quietly sum inctmp, detail
	gen highinc = inctmp > = r(p50) & inctmp <.

	xtset id year

	sum S.log_income_between if highinc==0
	matrix WSS[1, 9] = r(Var)
	sum S.log_income_between if highinc==1
	matrix WSS[1, 10] = r(Var)
	sum S.log_income_within if highinc==0
	matrix WSS[3, 9] = r(Var)
	sum S.log_income_within if highinc==1
	matrix WSS[3, 10] = r(Var)
	drop highinc

	matrix list WSS
	putexcel A1=matrix(WSS) using "$outputdir/tablea11" 


***************************************
******	Table A12
***************************************
** Joint Tests of Channels: Alternative Measure of Self-insurance: Variance Ratio of High/Low-income Groups

use "$workdir/reg_yr_main", clear
	bysort industry region: gen count_industryregion = 1 if _n==1
	gen industryregion = sum(count_industryregion) // a way of generating consistent industryregion identifiers

	** X2alt -- varianceratio_inc: Income based measure for the strength of the self-insurance channel 

	bysort id: egen inctmp = mean(income)        // average income over sample period

	xtset id year
	
	gen var_bt_low=.
	gen var_bt_high=.
	gen var_wt_low=.
	gen var_wt_high=.

	levelsof industryregion, local(industryregion_list)

	foreach i of local industryregion_list {
		qui sum inctmp if industryregion==`i', detail
		local median_inc_i = r(p50)
		qui sum S.log_income_between if inctmp < `median_inc_i' & industryregion==`i'
		replace var_bt_low = r(Var)  if industryregion==`i'
		qui sum S.log_income_between if inctmp >=`median_inc_i' & industryregion==`i'
		replace var_bt_high= r(Var)  if industryregion==`i'
		qui sum S.log_income_within  if inctmp < `median_inc_i' & industryregion==`i'
		replace var_wt_low = r(Var)  if industryregion==`i'
		qui sum S.log_income_within  if inctmp >=`median_inc_i' & industryregion==`i'
		replace var_wt_high= r(Var)  if industryregion==`i'
		}
	gen varianceratio_inc=(var_wt_high/var_wt_low)/(var_bt_high/var_bt_low)  

	keep industry region industryregion varianceratio_inc
	bysort industryregion: keep if _n==1
	merge 1:1 industryregion using "$workdir/jointtest"
	drop _merge 
	egen varianceratioinc_sd=std(varianceratio_inc)

	reg diff             varianceratio_sd            [aw = Nworkers],vce(robust)  
		outreg2 using "$outputdir/tablea12.xls", dec(3) cttop(1)   
	reg diff             varianceratioinc_sd         [aw = Nworkers],vce(robust)  
		outreg2 using "$outputdir/tablea12.xls", dec(3) cttop(2) append  

	pwcorr persisdiff_sd peeravg_sd varianceratio_sd varianceratioinc_sd      

***************************************
******	Table A13
***************************************
** Persistence of Within- and Between-income-cluster Income Components

use "$workdir/reg_yr_main",clear

	xtset id year

	reg log_income_within_cluster L.log_income_within_cluster  , vce(cluster id)
		outreg2 using "$outputdir/tablea13.xls", dec(3) cttop(withincluster) 
	reg log_income_between_cluster L.log_income_between_cluster , vce(cluster id)     
		outreg2 using "$outputdir/tablea13.xls", dec(3) cttop(betweencluster) append

	gen L_log_income_within_cluster=L.log_income_within_cluster
	gen L_log_income_between_cluster=L.log_income_between_cluster
	reg log_income_within_cluster L_log_income_within_cluster 
		est store withincluster
	reg log_income_between_cluster L_log_income_between_cluster 
		est store betweencluster
	suest withincluster betweencluster, vce(cluster id)
		test [withincluster_mean]L_log_income_within_cluster=[betweencluster_mean]L_log_income_between_cluster


***************************************
******	Table A14
***************************************
** Joint Tests of Channels: Accounting for an Interlink Between the Income Persistence Channel and the Peer Effect Channel

use "$workdir/reg_yr_main", clear
	bysort industry region: gen count_industryregion = 1 if _n==1
	gen industryregion = sum(count_industryregion) // a way of generating consistent industryregion identifiers

	** X1alt -- persis_diff_cluster: Measure for the interlink between the income persistence channel and the peer effect channel

	xtset id year   

	gen persis_bc=.
	gen persis_wc=.

	levelsof industryregion, local(industryregion_list)

	foreach i of local industryregion_list {
		cap reg log_income_within_cluster L.log_income_within_cluster  if  industryregion==`i', vce(cluster id)
		if _rc==0  {
			replace persis_wc=_b[L.log_income_within_cluster] if industryregion==`i'  
			}
		cap reg log_income_between_cluster L.log_income_between_cluster if industryregion==`i', vce(cluster id)
		if _rc==0  {
			replace persis_bc=_b[L.log_income_between_cluster] if industryregion==`i'
			}
		}
	gen persis_diff_cluster=persis_bc-persis_wc

	keep industry region industryregion persis_diff_cluster
	bysort industryregion: keep if _n==1
	merge 1:1 industryregion using "$workdir/jointtest"
	drop _merge 
	egen persis_diff_cluster_sd=std(persis_diff_cluster)

	reg diff persisdiff_sd varianceratio_sd peeravg_sd [aw = Nworkers],vce(robust) 
		outreg2 using "$outputdir/tablea14.xls", dec(3) cttop(1)   
	reg diff persisdiff_sd varianceratio_sd peeravg_sd persis_diff_cluster_sd [aw = Nworkers],vce(robust) 
		outreg2 using "$outputdir/tablea14.xls", dec(3) cttop(2) append  


***************************************
******	Table A15
***************************************
** Potential Effects of the Between-firm Component of Income Growth on Future Income Uncertainty

use "$workdir/reg_semiannual_main", clear

	xtset id halfyear // t = 1,2,3,4
	
	gen FS_logincome = FS.logincome // is nonmissing for t=1,2,3
	gen FS_logincome_positive = FS.logincome if FS.logincome>=0
	replace FS_logincome_positive = 0 if FS.logincome<0
	gen FS_logincome_negative = FS.logincome if FS.logincome<=0
	replace FS_logincome_negative = 0 if FS.logincome>0
	bysort company_no halfyear: egen sd_FS_logincome = sd(FS_logincome) // standard deviation of future income growth
	by     company_no halfyear: egen sd_FS_logincome_positive = sd(FS_logincome_positive) // positive semi standard deviation of future income growth
	by     company_no halfyear: egen sd_FS_logincome_negative = sd(FS_logincome_negative) // negative semi standard deviation of future income growth

	xtset id halfyear // t = 1,2,3,4
	gen S_log_income_between = S.log_income_between // current period growth of firm average income (the "between" component of income)
													// is nonmissing for t=2,3,4

	keep if !missing(sd_FS_logincome, S_log_income_between) // is nonmissing for t=2,3

	areg sd_FS_logincome  		  S_log_income_between ,a(id) vce(cluster id) 
		outreg2 using "$outputdir/tablea15.xls", dec(3) cttop(change) 
	areg sd_FS_logincome_positive S_log_income_between ,a(id) vce(cluster id) 
		outreg2 using "$outputdir/tablea15.xls", dec(3) cttop(positive) append
	areg sd_FS_logincome_negative S_log_income_between ,a(id) vce(cluster id) 
		outreg2 using "$outputdir/tablea15.xls", dec(3) cttop(negative) append

